In [1]:
import numpy as np
import pandas as pd
import torch
import boto3
from io import StringIO
from skimage import io
from skimage.transform import resize
from torch import nn
from sklearn.model_selection import train_test_split
In [2]:
class VGGNET(nn.Module):
def __init__(self):
super(VGGNET, self).__init__()
self.layer1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(64),nn.ReLU())
self.layer2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(64),nn.ReLU(), nn.MaxPool2d(kernel_size = 2, stride = 2))
self.layer3 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(128),nn.ReLU())
self.layer4 = nn.Sequential(nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(128),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
self.layer5 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(256),nn.ReLU())
self.layer6 = nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(256),nn.ReLU())
self.layer7 = nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(256),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
self.layer8 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
self.layer9 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
self.layer10 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
self.layer11 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
self.layer12 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(512),nn.ReLU())
self.layer13 = nn.Sequential(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
self.layer14 = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU())
self.layer15 = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU())
self.layer16 = nn.Sequential(nn.Conv2d(1024, 1024, kernel_size=3, stride=1, padding=1),nn.BatchNorm2d(1024),nn.ReLU(),nn.MaxPool2d(kernel_size = 2, stride = 2))
self.fc = nn.Sequential(nn.Dropout(0.5),nn.Linear(1024 * 3 * 3, 9216),nn.ReLU())
self.fc1 = nn.Sequential(nn.Dropout(0.5),nn.Linear(9216, 9216),nn.ReLU())
self.fc2= nn.Sequential(nn.Linear(9216, 1))
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = self.layer7(out)
out = self.layer8(out)
out = self.layer9(out)
out = self.layer10(out)
out = self.layer11(out)
out = self.layer12(out)
out = self.layer13(out)
out = self.layer14(out)
out = self.layer15(out)
out = self.layer16(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
out = self.fc1(out)
out = self.fc2(out)
return out
In [ ]:
if __name__ == '__main__':
#________Global Parameters___________________
BATCH_SIZE = 32
EPOCH = 40
LEARNING_RATE = 0.005
#_____Loading Dataset from S3 Bucket__________
# Create S3 client
s3 = boto3.client("s3",aws_access_key_id='xxxxxxxxx',aws_secret_access_key='xxxxxxxxxxx')
# Store bucket name
bucket_name = "thyroid-cancer-bucket"
#objects_list = s3.list_objects_v2(Bucket=bucket_name).get("Contents") # Retrive all directories from bucket
#Dataset_Images = list(map(lambda x: x['Key'] , filter(lambda x: '.jpg' in x['Key'] , objects_list))) # Select only Images
Labels = pd.read_csv(StringIO(str(s3.get_object(Bucket=bucket_name, Key = 'Labels.csv')['Body'].read(),'utf-8'))) # read labels as csv file
Dataset_Images = ['Dataset/' + item for item in Labels['fileName'].to_numpy()] # read Images Name
X = [] # features Vector
Y = Labels['label'].to_numpy() # Labels
print('#Reading Dataset Images...')
# Iterate over every object in bucket
for data in Dataset_Images:
# Read an object from the bucket
img = np.frombuffer(s3.get_object(Bucket=bucket_name, Key=data)['Body'].read() , dtype = np.uint8) # convert Buffered string bytes to np image
img = (resize(img , (224,224)) * 255).astype(np.uint8).reshape((1,224,224))
X.append(img) # add to x array
X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # Split Train and Test
model = VGGNET()
optimizer = torch.optim.Adam(model.parameters() , LEARNING_RATE) # Adam optimizer
Loss = nn.BCEWithLogitsLoss() # Binary Cross Entropy Loss function
for epoch in range(0,EPOCH):
temp_loss = 0
step = 0
temp_accuracy = 0
for batch in range(0 , len(X_train) , BATCH_SIZE): # train phase
Data = torch.from_numpy(X_train[batch : batch + BATCH_SIZE]).float()
labels = torch.from_numpy(y_train[batch : batch + BATCH_SIZE]).float()
predicted = model(Data).squeeze(1) # get model outputs
loss = Loss(predicted , labels) #calc binary cross entropy loss function
temp_loss += loss.item() # accumulative loss values
predicted = (predicted.data > 0.5).float() # check model predicted values
correct = (predicted == labels).sum().item() #correct Answers
temp_accuracy += (100 * (correct / len(labels))) # accumulative accuracy value in each batch data
step += 1
model.zero_grad() # ignore gradient history
#loss.requires_grad = True
loss.backward() # backward like back-propagation
optimizer.step() # save weights
print('Training Phase - Epoch # ' , str(epoch+1) , ', Loss : ' , str(temp_loss / step) , ' , Accuracy Value is : ' , str(temp_accuracy / step)+'%')
#______________Test Phase___________________
with torch.no_grad(): # Stop Updating Weights
true = 0
X_test = torch.from_numpy(X_test).float() # test data
Y_test = torch.from_numpy(y_test).float() # test labels
outputs = model(X_test).squeeze(1) # Test with 20% of Data
predicted = (outputs.data > 0.5).float() # Predicted Labels
del outputs # free Ram Space
true = (predicted == Y_test).sum().item() # correct Answers
print('The Model Accuracy Is : ' , str(100 * (true / len(Y_test))) + '%') # Print Final Accuracy
#Reading Dataset Images... Training Phase - Epoch # 0 , Loss : 160.028329372406 , Accuracy Value is : 59.10326086956522% Training Phase - Epoch # 1 , Loss : 8.938586235046387 , Accuracy Value is : 70.27475845410628% Training Phase - Epoch # 2 , Loss : 2.282831016514036 , Accuracy Value is : 79.58937198067633% Training Phase - Epoch # 3 , Loss : 0.572899791929457 , Accuracy Value is : 57.57850241545894% Training Phase - Epoch # 4 , Loss : 0.6044369273715549 , Accuracy Value is : 82.01992753623189% Training Phase - Epoch # 5 , Loss : 0.5367798573440976 , Accuracy Value is : 81.67270531400966% Training Phase - Epoch # 7 , Loss : 0.44552314281463623 , Accuracy Value is : 82.71437198067633% Training Phase - Epoch # 8 , Loss : 0.4366236974795659 , Accuracy Value is : 82.71437198067633% Training Phase - Epoch # 9 , Loss : 0.43553534812397426 , Accuracy Value is : 82.71437198067633% Training Phase - Epoch # 10 , Loss : 0.44049228231112164 , Accuracy Value is : 82.3671497584541% Training Phase - Epoch # 11 , Loss : 0.4208071033159892 , Accuracy Value is : 83.40881642512078% Training Phase - Epoch # 12 , Loss : 0.4205107589562734 , Accuracy Value is : 81.67270531400966% Training Phase - Epoch # 13 , Loss : 0.4215390847788917 , Accuracy Value is : 83.40881642512078% Training Phase - Epoch # 14 , Loss : 0.4123901708258523 , Accuracy Value is : 82.3671497584541% Training Phase - Epoch # 15 , Loss : 0.40886599653297 , Accuracy Value is : 83.40881642512078% Training Phase - Epoch # 16 , Loss : 0.40121200680732727 , Accuracy Value is : 84.10326086956522% Training Phase - Epoch # 17 , Loss : 0.39108316269185806 , Accuracy Value is : 84.10326086956522% Training Phase - Epoch # 18 , Loss : 0.3933882961670558 , Accuracy Value is : 84.45048309178745% Training Phase - Epoch # 19 , Loss : 0.38997334738572437 , Accuracy Value is : 83.54468599033817% Training Phase - Epoch # 20 , Loss : 0.39450888997978634 , Accuracy Value is : 83.54468599033817% Training Phase - Epoch # 21 , Loss : 0.39227795600891113 , Accuracy Value is : 83.19746376811594% Training Phase - Epoch # 22 , Loss : 0.39049198230107623 , Accuracy Value is : 83.756038647343% Training Phase - Epoch # 23 , Loss : 0.35965031882127124 , Accuracy Value is : 88.54166666666667% Training Phase - Epoch # 24 , Loss : 0.36477796650595135 , Accuracy Value is : 87.71135265700484% Training Phase - Epoch # 25 , Loss : 0.33614182968934375 , Accuracy Value is : 88.7530193236715% Training Phase - Epoch # 26 , Loss : 0.3366866161425908 , Accuracy Value is : 89.58333333333333% Training Phase - Epoch # 27 , Loss : 0.3477441966533661 , Accuracy Value is : 87.15277777777777% Training Phase - Epoch # 28 , Loss : 0.3386049485868878 , Accuracy Value is : 89.44746376811594% Training Phase - Epoch # 31 , Loss : 0.29728467100196415 , Accuracy Value is : 89.65881642512078% Training Phase - Epoch # 32 , Loss : 0.2762363735172484 , Accuracy Value is : 88.6171497584541% Training Phase - Epoch # 33 , Loss : 0.2542361105173212 , Accuracy Value is : 88.6191235690112% Training Phase - Epoch # 34 , Loss : 0.31613292954934215 , Accuracy Value is : 88.7490193189715% Training Phase - Epoch # 35 , Loss : 0.3366865221425908 , Accuracy Value is : 89.58333333333333% Training Phase - Epoch # 36 , Loss : 0.29706467100162402 , Accuracy Value is : 89.59021156784321% Training Phase - Epoch # 37 , Loss : 0.28146235600256411 , Accuracy Value is : 89.59137836281319% Training Phase - Epoch # 38 , Loss : 0.28012345210252215 , Accuracy Value is : 89.59172821292521% Training Phase - Epoch # 39 , Loss : 0.27113285000226312 , Accuracy Value is : 89.59187325142561% Training Phase - Epoch # 40 , Loss : 0.27102235400166413 , Accuracy Value is : 89.59201236370252% The Model Accuracy Is : 90.00213443567643 %